*********************************************************************
*This file cleans IFR data. Adapted version of Pascual Restrepo, JPE*
*********************************************************************

use "$raw_data_automation/robots_all.dta", clear

rename (robots_market robots_operational) (flow stock)
replace industry = "all" if industry == "000-All Industries"
replace industry = "agriculture" if industry == "A-B-Agriculture, forestry, fishing"
replace industry = "mining" if industry == "C-Mining and quarrying"
replace industry = "manufacturing" if industry == "D-Manufacturing"
replace industry = "food" if industry == "10-12-Food and beverages"
replace industry = "textiles" if industry == "13-15-Textiles"
replace industry = "furniture" if industry == "16-Wood and furniture"
replace industry = "paper" if industry == "17-18-Paper"
replace industry = "petrochemicals" if industry == "19-22-Plastic and chemical products"
replace industry = "petrochemicals_pharma" if industry == "19-Pharmaceuticals, cosmetics"
replace industry = "petrochemicals_chemicals" if industry == "20-21-other chemical products n.e.c."
replace industry = "petrochemicals_plastic" if industry == "22-Rubber and plastic products (non-automotive)"
replace industry = "petrochemicals_unspecified" if industry == "229-Chemical products, unspecified"
replace industry = "mineral" if industry == "23-Glass, ceramics, stone, mineral products (non-auto"
replace industry = "metal" if industry == "24-28-Metal"
replace industry = "metal_basic" if industry == "24-Basic metals"
replace industry = "metal_products" if industry == "25-Metal products (non-automotive)"
replace industry = "metal_machinery" if industry == "28-Industrial machinery"
replace industry = "metal_unspecified" if industry == "289-Metal, unspecified"
replace industry = "electronics" if industry == "26-27-Electrical/electronics"
replace industry = "electronics_appliances" if industry == "275-Household/domestic appliances"
replace industry = "electronics_machinery" if industry == "271-Electrical machinery n.e.c. (non-automotive)"
replace industry = "electronics_components" if industry == "260-Electronic components/devices"
replace industry = "electronics_conductors" if industry == "261-Semiconductors, LCD, LED"
replace industry = "electronics_computers" if industry == "262-Computers and peripheral equipment"
replace industry = "electronics_communication" if industry == "263-Info communication equipment, domestic and prof. ("
replace industry = "electronics_medical" if industry == "265-Medical, precision, optical instruments"
replace industry = "electronics_unspecified" if industry == "279-Electrical/electronics unspecified"
replace industry = "automotive" if industry == "29-Automotive"
replace industry = "automotive_bodies" if industry == "291-Motor vehicles, engines and bodies"
replace industry = "automotive_parts" if industry == "293-Automotive parts"
replace industry = "automotive_parts_metal" if industry == "2931-Metal (AutoParts)"
replace industry = "automotive_parts_rubber" if industry == "2932-Rubber and plastic (AutoParts)"
replace industry = "automotive_parts_electric" if industry == "2933-Electrical/electronic (AutoParts)"
replace industry = "automotive_parts_glass" if industry == "2934-Glass (AutoParts)"
replace industry = "automotive_parts_other" if industry == "2939-Other (AutoParts)"
replace industry = "automotive_parts_unspecified" if industry == "2999-Unspecified AutoParts"
replace industry = "automotive_unspecified" if industry == "299-Automotive unspecified"
replace industry = "vehicles_other" if industry == "30-Other vehicles"
replace industry = "manufacturing_other" if industry == "91-All other manufacturing branches"
replace industry = "utilities" if industry == "E-Electricity, gas, water supply"
replace industry = "construction" if industry == "F-Construction"
replace industry = "research" if industry == "P-Education/research/development"
replace industry = "services" if industry == "90-All other non-manufacturing branches"
replace industry = "unspecified" if industry == "99-Unspecified"

save "$clean_data_automation/robot_data_full.dta", replace

**The aggregation is done both for the *stock* of robots and the *flow* of robots, hence the loop**
foreach vartype in  stock flow {

*Opens IFR raw data*
use "$clean_data_automation/robot_data_full.dta", clear

replace country ="United States" if country == "North America"
*Defines set of countries to compute the data from*
gen usa=0
replace usa=1 if country=="United States" 

*European countries with sufficient data*
gen euro_full=0
replace euro_full=1 if country=="Germany"
replace euro_full=1 if country=="Spain"
replace euro_full=1 if country=="Finland"
replace euro_full=1 if country=="France"
replace euro_full=1 if country=="Italy"
replace euro_full=1 if country=="Sweden"
replace euro_full=1 if country=="United Kingdom" 
replace euro_full=1 if country=="Denmark"
replace euro_full=1 if country=="Norway" 

keep if euro_full==1 | usa==1 //removes CH

gen `vartype'_raw=`vartype' 

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Step 1: Imputes robot data using proportions of first year in which >50% of the data is classified
-does it for 17 broad industries [we divide metals later on]
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

*Compute share of data classified by industry. Tag countries where this share is below 50%. Only countries in which the data is not classified in its majority are Denmark (from 1993-1998) and U.S. (from 1993-2008)*
bys country year: egen all_`vartype'=total(`vartype'*(industry=="all"))
bys country year: egen unc_`vartype'=total(`vartype'*(industry=="unspecified"))
gen share_classified=1-unc_`vartype'/all_`vartype'
assert share_classified!=.
gen tag_classified=(share_classified>0.5)
tostring year, gen(styear)
gen country_year=country+styear
tab country_year if tag_classified==0
bys country: list country year share_classified if industry=="all"

*Defines 17 industries for which we will start allocating the unclassified robots*
gen b17_ind=0
replace b17_ind=1 if   industry=="agriculture" ///
                     | industry=="automotive" ///
					 | industry=="petrochemicals" ///
					 | industry=="electronics" ///
					 | industry=="construction" ///
					 | industry=="research" ///
					 | industry=="food" ///
					 | industry=="furniture" ///
					 | industry=="mineral" ///
					 | industry=="manufacturing_other" ///
					 | industry=="metal" ///
					 | industry=="mining" ///
					 | industry=="paper" ///
					 | industry=="textiles" ///
					 | industry=="utilities" ///
					 | industry=="vehicles_other" ///
					 | industry=="services"
					 
*Asserts that unclassified robots account for residual*
bys year country: egen partial_`vartype'=total(`vartype'*(b17_ind==1))
assert partial_`vartype'==all_`vartype'-unc_`vartype'

*Allocate the residual category in fixed proportions for years in which data is available*
gen `vartype'_b17=`vartype'+unc_`vartype'*`vartype'/(all_`vartype'-unc_`vartype') if tag_classified==1 & b17_ind==1
assert `vartype'_b17!=. if b17_ind==1 & tag_classified==1

*Test that the 17 categories account for aggregate (in country-years with classified data)*
bys country year: egen test_b17=total(`vartype'_b17*(b17_ind==1))
assert abs(test_b17-all_`vartype')<0.01 if tag_classified==1 
drop test_b17

*Following Graetz and Michaels, for the US and Denmark, allocate the residual category back in time using proportions from the earlier year in which the data was classified in its majority*
gen negyear=-year
sort country industry negyear
bys country industry: replace `vartype'_b17=`vartype'+unc_`vartype'[_n]*`vartype'_b17[_n-1]/all_`vartype'[_n-1]  if `vartype'_b17==. & tag_classified==0 & b17_ind==1
bys country year: egen test_b17=total(`vartype'_b17*(b17_ind==1))

*verify the data for the 17 categories is complete and available for all years.  Then test all robtos allocated to the 17 industries*
assert abs(test_b17-all_`vartype')<0.01
drop test_b17
bys country: tab b17_ind year if `vartype'_b17!=.

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Step 2: Separates metal industries into basic metals, metal machinery and metal products.
Pool metal machinery (industry 28 in IFR files) with the residual category (industry 289).
This last step accounts for some relabelling of industries in 2004 in all european countries.
As before, The US and Denmark need to be handled separately 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 
gen `vartype'_b19=`vartype'_b17 if b17_ind==1 & industry!="metal"

gen metal_ind=0
replace metal_ind=1 if industry=="metal_basic" ///
                     | industry=="metal_machinery" ///
					 | industry=="metal_products"
					 
bys country year: egen metal_unc_`vartype'=total(`vartype'*(industry=="metal_unspecified"))
bys country year: egen metal_tot_`vartype'=total(`vartype'*(industry=="metal_unspecified"|metal_ind==1))

*compute share of unclassified robots in metal industries*
gen share_metal_classified=1-metal_unc_`vartype'/metal_tot_`vartype'
bys country: list country year share_classified if industry=="metal"

*pool other metals and industrial machinery (separated in 2004)*
replace `vartype'=`vartype'+metal_unc_`vartype' if industry=="metal_machinery"
replace `vartype'=0 if industry=="metal_unspecified"

*inflate categoriess to match total allocation in metals
bys country year: egen `vartype'_metal=total(`vartype'*(metal_ind==1))
bys country year: egen `vartype'_metal_adj=total(`vartype'_b17*(industry=="metal"))
replace `vartype'_b19=`vartype'*`vartype'_metal_adj/`vartype'_metal if metal_ind==1
replace `vartype'_b19=0 if `vartype'_metal_adj==0 & metal_ind==1
tab country_year if `vartype'_b19==. & metal_ind==1

*We still  need to allocate robots in the metal industry for Denmark and the US*
*For US start in 2014, the first year with a sufficient classification within metals*
*For Denmark, start in 2002, the first year with a sufficient  classification within metals
if "`vartype'"=="stock"{
replace `vartype'_b19=. if country=="United States" & metal_ind==1 & year<=2013
replace `vartype'_b19=. if country=="Denmark" & metal_ind==1 & year<=2001 
}
*for US and Denmark, extrapolate back in time the use of robots within the different metal industries*
sort country industry negyear
bys country industry: replace `vartype'_b19=`vartype'_metal_adj[_n]*`vartype'_b19[_n-1]/`vartype'_metal_adj[_n-1]  if `vartype'_b19==. & metal_ind==1

*defines the 19 industries used in the paper and test all robots were allocated*
gen b19_ind=0
replace b19_ind=1   if industry=="agriculture" ///
                     | industry=="automotive" ///
					 | industry=="petrochemicals" ///
					 | industry=="electronics" ///
					 | industry=="construction" ///
					 | industry=="research" ///
					 | industry=="food" ///
					 | industry=="furniture" ///
					 | industry=="mineral" ///
					 | industry=="manufacturing_other" ///
					 | industry=="metal_basic" ///
					 | industry=="metal_machinery" ///
					 | industry=="metal_products" ///
					 | industry=="mining" ///
					 | industry=="paper" ///
					 | industry=="textiles" ///
					 | industry=="utilities" ///
					 | industry=="vehicles_other" ///
					 | industry=="services"
					 
bys country year: egen test_b19=total(`vartype'_b19*(b19_ind==1))
assert abs(test_b19-all_`vartype')<0.01
drop test_b19
bys country: tab b19_ind year if `vartype'_b19!=.


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Step 3: save the data
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 
keep if b19_ind==1
rename industry industry_ifr19
keep country industry_ifr19 year `vartype'_b19
save "$clean_data_automation/robot_`vartype'_ifr19.dta", replace

}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Step 4: Adjust for reclassification of "manufacturing other" and "other vehicles"
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
use "$clean_data_automation/robot_stock_ifr19.dta"
merge 1:1 country year industry_ifr19 using "$clean_data_automation/robot_flow_ifr19.dta", assert(1 3) nogenerate
sort country industry_ifr year
sort country industry year

* construct alternative series based on perpetual inventory method and using a 4% depreciation rate (this roughly matches the stocks reported by the IFR)* 
bys country industry: gen stock_b19_alt=stock_b19[1] if _n==1
bys country industry: replace stock_b19_alt=0.96*stock_b19_alt[_n-1]+flow_b19[_n-1] if _n>1

sort country industry year
gen stock_b19_orig=stock_b19

* manufacturing_other	Finland:	APR goes from 11.64 robots per 1k workers to 1.84; then back to 10.45
sort country industry year
bys country industry: replace stock_b19=stock_b19_alt-stock_b19_alt[15]+stock_b19_orig[15]  if industry=="manufacturing_other" & country=="Finland"

* manufacturing_other	Sweden:		APR goes from 16.65 robots per 1k workers to 4.18; then back to 15.28
sort country industry year
bys country industry: replace stock_b19=stock_b19_alt-stock_b19_alt[15]+stock_b19_orig[15]  if industry=="manufacturing_other" & country=="Sweden"

* vehicles_other		Finland:	APR goes from 2.30 robots per 1k workers to 0.72
sort country industry year
bys country industry: replace stock_b19=stock_b19_alt-stock_b19_alt[15]+stock_b19_orig[15]  if industry=="vehicles_other" & country=="Finland"

* vehicles_other		Italy:	    APR goes from 15.56 robots per 1k workers to 2.76; break in 2004-2005
sort country industry year
bys country industry: replace stock_b19=stock_b19_alt-stock_b19_alt[15]+stock_b19_orig[15]  if industry=="vehicles_other" & country=="Italy"

* assert non missings or negative stocks *
assert stock_b19!=. & stock_b19>=0

save "$clean_data_automation/robot_data_ifr19.dta", replace
